library(survminer)
library(survival)
library(tidyverse)
library(DT)

PATH <- file.path(Sys.getenv("MLAB"), "projects/brcameta/exosome/4t1_brca_brain_mets/")
TCGAPATH <- file.path(Sys.getenv("CBM"),"TCGA-GDC/")
DPATH <- file.path(Sys.getenv("CBM"),"otherStudies/RNAseq/2022-06-03-DenisExosomeBrCaBrainMets/")

do_save <- TRUE

Loading Data

tcga_gsva <- readRDS(file.path(PATH, "data/tcga_gsva_res.rds"))
metabric_gsva <- readRDS(file.path(PATH, "data/metabric_gsva_res.rds"))

#Signatures

all_sigs <- readRDS(file.path(PATH,"data/signatures_symbol.rds"))
all_sigs <- all_sigs[lapply(all_sigs, length) > 1]

all_sigs_human <- lapply(all_sigs, babelgene::orthologs, species="mouse", human=FALSE)
all_sigs_human <- lapply(all_sigs_human, function (x) x %>% dplyr::pull(human_symbol))

Data Filtering

na_filter <- !is.na(tcga_gsva$vital_status)
missing_death_day_filter <- !((tcga_gsva$vital_status == "Dead") & is.na(tcga_gsva$days_to_death))
subtype_filter <- !is.na(tcga_gsva$subtype_selected)
data_filter <- na_filter & missing_death_day_filter & subtype_filter

tcga_gsva <- tcga_gsva[,data_filter]

Censoring: If samples are alive, then they lived at least as long as the day to their last followup. For 5 year threshold, those who died after 5 years, should be right-censored, and labeled to have lived at least as long as ‘days to death’ + 1.

pData(tcga_gsva) <- pData(tcga_gsva) %>% 
  rownames_to_column("ID") %>% 
  mutate(time = if_else(!is.na(days_to_death),days_to_death,days_to_last_follow_up)) %>%
  mutate(time_5 = if_else(as.numeric(time) < 1825.0, as.numeric(time) , 1826.0)) %>%
  mutate(vital_status_1 = if_else(vital_status == "Alive", 1, 2)) %>%
  mutate(vital_status_5 = if_else(vital_status == "Dead" & (time_5 > 1825.0), 1, vital_status_1)) %>% 
  column_to_rownames("ID")

pData(metabric_gsva) <- pData(metabric_gsva) %>% 
  mutate(time = OS_MONTHS*30.437) %>%
  mutate(time_5 = if_else(as.numeric(time) < 1825.0, as.numeric(time) , 1826.0)) %>%
  mutate(vital_status_1 = if_else(OS_STATUS == "LIVING", 1, 2)) %>%
  mutate(vital_status_5 = if_else(OS_STATUS == "DECEASED" & (time_5 > 1825.0), 1, vital_status_1))

Survfit All subtypes

tcga_threshold_ir_c <- median(exprs(tcga_gsva["ir_c_up",]) - exprs(tcga_gsva["ir_c_down"]))
tcga_threshold_is_c <- median(exprs(tcga_gsva["is_c_up",])-exprs(tcga_gsva["is_c_down",]))
tcga_threshold_ir_is <- median(exprs(tcga_gsva["ir_is_up",])-exprs(tcga_gsva["ir_is_down",]))
metabric_threshold_ir_c <- median(exprs(metabric_gsva["ir_c_up",]) - exprs(metabric_gsva["ir_c_down"]))
metabric_threshold_is_c <- median(exprs(metabric_gsva["is_c_up",])-exprs(metabric_gsva["is_c_down",]))
metabric_threshold_ir_is <- median(exprs(metabric_gsva["ir_is_up",])-exprs(metabric_gsva["ir_is_down",]))

tcga_threshold_ir_c_up <- median(exprs(tcga_gsva["ir_c_up",]))
tcga_threshold_is_c_up <- median(exprs(tcga_gsva["is_c_up",]))
tcga_threshold_ir_is_up <- median(exprs(tcga_gsva["ir_is_up",]))
metabric_threshold_ir_c_up <- median(exprs(metabric_gsva["ir_c_up",]))
metabric_threshold_is_c_up <- median(exprs(metabric_gsva["is_c_up",]))
metabric_threshold_ir_is_up <- median(exprs(metabric_gsva["ir_is_up",]))

Adding GSVA data

tcga_gsva$ir_c <- t(exprs(tcga_gsva["ir_c_up",]) - exprs(tcga_gsva["ir_c_down"]))
tcga_gsva$is_c <- t(exprs(tcga_gsva["is_c_up",])-exprs(tcga_gsva["is_c_down",]))
tcga_gsva$ir_is <- t(exprs(tcga_gsva["ir_is_up",])-exprs(tcga_gsva["ir_is_down",]))
tcga_gsva$ir_c_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_c <= tcga_threshold_ir_c, "low", "high"))
tcga_gsva$is_c_stat <- with(tcga_gsva, ifelse(tcga_gsva$is_c <= tcga_threshold_is_c, "low", "high"))
tcga_gsva$ir_is_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_is <= tcga_threshold_ir_is, "low", "high"))

tcga_gsva$ir_c_up <- t(exprs(tcga_gsva["ir_c_up",]))
tcga_gsva$is_c_up <- t(exprs(tcga_gsva["is_c_up",]))
tcga_gsva$ir_is_up <- t(exprs(tcga_gsva["ir_is_up",]))
tcga_gsva$ir_c_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_c_up <= tcga_threshold_ir_c_up, "low", "high"))
tcga_gsva$is_c_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$is_c_up <= tcga_threshold_is_c_up, "low", "high"))
tcga_gsva$ir_is_up_stat <- with(tcga_gsva, ifelse(tcga_gsva$ir_is_up <= tcga_threshold_ir_is_up, "low", "high"))

metabric_gsva$ir_c <- t(exprs(metabric_gsva["ir_c_up",]) - exprs(metabric_gsva["ir_c_down"]))
metabric_gsva$is_c <- t(exprs(metabric_gsva["is_c_up",])-exprs(metabric_gsva["is_c_down",]))
metabric_gsva$ir_is <- t(exprs(metabric_gsva["ir_is_up",])-exprs(metabric_gsva["ir_is_down",]))
metabric_gsva$ir_c_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_c <= metabric_threshold_ir_c, "low", "high"))
metabric_gsva$is_c_stat <- with(metabric_gsva, ifelse(metabric_gsva$is_c <= metabric_threshold_is_c, "low", "high"))
metabric_gsva$ir_is_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_is <= metabric_threshold_ir_is, "low", "high"))

metabric_gsva$ir_c_up <- t(exprs(metabric_gsva["ir_c_up",]))
metabric_gsva$is_c_up <- t(exprs(metabric_gsva["is_c_up",]))
metabric_gsva$ir_is_up <- t(exprs(metabric_gsva["ir_is_up",]))
metabric_gsva$ir_c_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_c_up <= metabric_threshold_ir_c_up, "low", "high"))
metabric_gsva$is_c_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$is_c_up <= metabric_threshold_is_c_up, "low", "high"))
metabric_gsva$ir_is_up_stat <- with(metabric_gsva, ifelse(metabric_gsva$ir_is_up <= metabric_threshold_ir_is_up, "low", "high"))

IR_C_UP

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_c_up_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_c_up_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_c_up_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_c_up_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC w/ 5-year threshold')

IR_C

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_c_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_c_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_c_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_c_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC w/ 5-year threshold')

IS_C_UP

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$is_c_up_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$is_c_up_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$is_c_up_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$is_c_up_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC w/ 5-year threshold')

IS_C

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$is_c_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$is_c_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$is_c_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$is_c_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC w/ 5-year threshold')

IR_IS_UP

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_is_up_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_is_up_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_is_up_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_is_up_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC w/ 5-year threshold')

IR_IS

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time), tcga_gsva$vital_status_1) ~ tcga_gsva$ir_is_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva$time_5), tcga_gsva$vital_status_5) ~ tcga_gsva$ir_is_stat),
    data = tcga_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time), metabric_gsva$vital_status_1) ~ metabric_gsva$ir_is_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva$time_5), metabric_gsva$vital_status_5) ~ metabric_gsva$ir_is_stat),
    data = metabric_gsva, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'METABRIC w/ 5-year threshold')

Survfit Basal

Thresholds

tcga_basal_filter <- tcga_gsva$subtype_selected == "BRCA.Basal"
tcga_gsva_basal <- tcga_gsva[,tcga_basal_filter]
metabric_basal_filter <- metabric_gsva$Pam50_SUBTYPE == "Basal"
metabric_gsva_basal <- metabric_gsva[,metabric_basal_filter]

tcga_threshold_ir_c <- median(exprs(tcga_gsva_basal["ir_c_up",])-exprs(tcga_gsva_basal["ir_c_down",]))
tcga_threshold_is_c <- median(exprs(tcga_gsva_basal["is_c_up",])-exprs(tcga_gsva_basal["is_c_down",]))
tcga_threshold_ir_is <- median(exprs(tcga_gsva_basal["ir_is_up",])-exprs(tcga_gsva_basal["ir_is_down",]))
metabric_threshold_ir_c <- median(exprs(metabric_gsva_basal["ir_c_up",])-exprs(metabric_gsva_basal["ir_c_down",]))
metabric_threshold_is_c <- median(exprs(metabric_gsva_basal["is_c_up",])-exprs(metabric_gsva_basal["is_c_down",]))
metabric_threshold_ir_is <- median(exprs(metabric_gsva_basal["ir_is_up",])-exprs(metabric_gsva_basal["ir_is_down",]))

tcga_threshold_ir_c_up <- median(exprs(tcga_gsva_basal["ir_c_up",]))
tcga_threshold_is_c_up <- median(exprs(tcga_gsva_basal["is_c_up",]))
tcga_threshold_ir_is_up <- median(exprs(tcga_gsva_basal["ir_is_up",]))
metabric_threshold_ir_c_up <- median(exprs(metabric_gsva_basal["ir_c_up",]))
metabric_threshold_is_c_up <- median(exprs(metabric_gsva_basal["is_c_up",]))
metabric_threshold_ir_is_up <- median(exprs(metabric_gsva_basal["ir_is_up",]))

Adding GSVA data

tcga_gsva_basal$ir_c <- t(exprs(tcga_gsva_basal["ir_c_up",])-exprs(tcga_gsva_basal["ir_c_down",]))
tcga_gsva_basal$is_c <- t(exprs(tcga_gsva_basal["is_c_up",])-exprs(tcga_gsva_basal["is_c_down",]))
tcga_gsva_basal$ir_is <- t(exprs(tcga_gsva_basal["ir_is_up",])-exprs(tcga_gsva_basal["ir_is_down",]))
metabric_gsva_basal$ir_c <- t(exprs(metabric_gsva_basal["ir_c_up",])-exprs(metabric_gsva_basal["ir_c_down",]))
metabric_gsva_basal$is_c <- t(exprs(metabric_gsva_basal["is_c_up",])-exprs(metabric_gsva_basal["is_c_down",]))
metabric_gsva_basal$ir_is <- t(exprs(metabric_gsva_basal["ir_is_up",])-exprs(metabric_gsva_basal["ir_is_down",]))

tcga_gsva_basal$ir_c_up <- t(exprs(tcga_gsva_basal["ir_c_up",]))
tcga_gsva_basal$is_c_up <- t(exprs(tcga_gsva_basal["is_c_up",]))
tcga_gsva_basal$ir_is_up <- t(exprs(tcga_gsva_basal["ir_is_up",]))
metabric_gsva_basal$ir_c_up <- t(exprs(metabric_gsva_basal["ir_c_up",]))
metabric_gsva_basal$is_c_up <- t(exprs(metabric_gsva_basal["is_c_up",]))
metabric_gsva_basal$ir_is_up <- t(exprs(metabric_gsva_basal["ir_is_up",]))



tcga_gsva_basal$ir_c_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_c <= tcga_threshold_ir_c, "low", "high"))
tcga_gsva_basal$is_c_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$is_c <= tcga_threshold_is_c, "low", "high"))
tcga_gsva_basal$ir_is_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_is <= tcga_threshold_ir_is, "low", "high"))
tcga_gsva_basal$ir_c_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_c_up <= tcga_threshold_ir_c_up, "low", "high"))
tcga_gsva_basal$is_c_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$is_c_up <= tcga_threshold_is_c_up, "low", "high"))
tcga_gsva_basal$ir_is_up_stat <- with(tcga_gsva_basal, ifelse(tcga_gsva_basal$ir_is_up <= tcga_threshold_ir_is_up, "low", "high"))

metabric_gsva_basal$ir_c_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_c <= metabric_threshold_ir_c, "low", "high"))
metabric_gsva_basal$is_c_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$is_c <= metabric_threshold_is_c, "low", "high"))
metabric_gsva_basal$ir_is_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_is <= metabric_threshold_ir_is, "low", "high"))
metabric_gsva_basal$ir_c_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_c_up <= metabric_threshold_ir_c_up, "low", "high"))
metabric_gsva_basal$is_c_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$is_c_up <= metabric_threshold_is_c_up, "low", "high"))
metabric_gsva_basal$ir_is_up_stat <- with(metabric_gsva_basal, ifelse(metabric_gsva_basal$ir_is_up <= metabric_threshold_ir_is_up, "low", "high"))

IR_C_UP

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_c_up_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_c_up_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_c_up_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_c_up_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric w/ 5-year threshold')

IR_C

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_c_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_c_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_c_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_c_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric w/ 5-year threshold')

IS_C_UP

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$is_c_up_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$is_c_up_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$is_c_up_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$is_c_up_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric w/ 5-year threshold')

IS_C

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$is_c_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$is_c_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$is_c_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$is_c_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric w/ 5-year threshold')

IR_IS_UP

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_is_up_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title='TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_is_up_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_is_up_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_is_up_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric w/ 5-year threshold')

IR_IS

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time), tcga_gsva_basal$vital_status_1) ~ tcga_gsva_basal$ir_is_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title='TCGA')

ggsurvplot(
    fit = survfit(Surv(as.numeric(tcga_gsva_basal$time_5), tcga_gsva_basal$vital_status_5) ~ tcga_gsva_basal$ir_is_stat),
    data = tcga_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'TCGA w/ 5-year threshold')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time), metabric_gsva_basal$vital_status_1) ~ metabric_gsva_basal$ir_is_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric')

ggsurvplot(
    fit = survfit(Surv(as.numeric(metabric_gsva_basal$time_5), metabric_gsva_basal$vital_status_5) ~ metabric_gsva_basal$ir_is_stat),
    data = metabric_gsva_basal, 
    xlab = "Days", 
    ylab = "Overall survival probability",
    conf.int = TRUE,
    pval = TRUE,
    title = 'Metabric w/ 5-year threshold')

Cox Proportional Hazard

TCGA

pData(tcga_gsva)$subtype_selected <- relevel(factor(pData(tcga_gsva)$subtype_selected), "BRCA.Normal")
tcga.cox1 <- coxph(Surv(as.numeric(time), vital_status_1) ~  subtype_selected , data=pData(tcga_gsva))
tcga.cox2 <- coxph(Surv(as.numeric(time), vital_status_1) ~  ir_is + subtype_selected , data=pData(tcga_gsva))
anova(tcga.cox1, tcga.cox2)
summary(tcga.cox2)
## Call:
## coxph(formula = Surv(as.numeric(time), vital_status_1) ~ ir_is + 
##     subtype_selected, data = pData(tcga_gsva))
## 
##   n= 1208, number of events= 199 
## 
##                               coef exp(coef) se(coef)      z Pr(>|z|)    
## ir_is                       0.3748    1.4547   0.1705  2.198  0.02796 *  
## subtype_selectedBRCA.Basal -0.6507    0.5217   0.2457 -2.649  0.00808 ** 
## subtype_selectedBRCA.Her2  -0.1176    0.8891   0.2848 -0.413  0.67974    
## subtype_selectedBRCA.LumA  -0.8958    0.4083   0.1903 -4.707 2.51e-06 ***
## subtype_selectedBRCA.LumB  -0.3552    0.7011   0.2325 -1.528  0.12658    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                            exp(coef) exp(-coef) lower .95 upper .95
## ir_is                         1.4547     0.6874    1.0414    2.0321
## subtype_selectedBRCA.Basal    0.5217     1.9170    0.3223    0.8443
## subtype_selectedBRCA.Her2     0.8891     1.1248    0.5088    1.5537
## subtype_selectedBRCA.LumA     0.4083     2.4493    0.2812    0.5929
## subtype_selectedBRCA.LumB     0.7011     1.4264    0.4445    1.1057
## 
## Concordance= 0.607  (se = 0.025 )
## Likelihood ratio test= 33.12  on 5 df,   p=4e-06
## Wald test            = 34.46  on 5 df,   p=2e-06
## Score (logrank) test = 36.28  on 5 df,   p=8e-07

Metabric

pData(metabric_gsva)$Pam50_SUBTYPE <- relevel(factor(pData(metabric_gsva)$Pam50_SUBTYPE), "Normal")
metabric.cox1 <- coxph(Surv(as.numeric(time), vital_status_1) ~  Pam50_SUBTYPE , data=pData(metabric_gsva))
metabric.cox2 <- coxph(Surv(as.numeric(time), vital_status_1) ~  ir_is + Pam50_SUBTYPE , data=pData(metabric_gsva))
anova(metabric.cox1, metabric.cox2)
summary(metabric.cox2)
## Call:
## coxph(formula = Surv(as.numeric(time), vital_status_1) ~ ir_is + 
##     Pam50_SUBTYPE, data = pData(metabric_gsva))
## 
##   n= 1980, number of events= 1143 
## 
##                        coef exp(coef) se(coef)      z Pr(>|z|)    
## ir_is               0.13762   1.14754  0.07288  1.888 0.058967 .  
## Pam50_SUBTYPEBasal  0.14860   1.16021  0.12897  1.152 0.249236    
## Pam50_SUBTYPEHer2   0.45151   1.57068  0.12873  3.507 0.000453 ***
## Pam50_SUBTYPELumA  -0.05352   0.94789  0.11374 -0.471 0.637988    
## Pam50_SUBTYPELumB   0.39747   1.48805  0.11622  3.420 0.000626 ***
## Pam50_SUBTYPENC     0.60061   1.82323  0.45912  1.308 0.190813    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                    exp(coef) exp(-coef) lower .95 upper .95
## ir_is                 1.1475     0.8714    0.9948     1.324
## Pam50_SUBTYPEBasal    1.1602     0.8619    0.9011     1.494
## Pam50_SUBTYPEHer2     1.5707     0.6367    1.2204     2.021
## Pam50_SUBTYPELumA     0.9479     1.0550    0.7585     1.185
## Pam50_SUBTYPELumB     1.4881     0.6720    1.1849     1.869
## Pam50_SUBTYPENC       1.8232     0.5485    0.7414     4.484
## 
## Concordance= 0.576  (se = 0.009 )
## Likelihood ratio test= 50.03  on 6 df,   p=5e-09
## Wald test            = 50.78  on 6 df,   p=3e-09
## Score (logrank) test = 51.47  on 6 df,   p=2e-09